// 文档 https://github.com/hooke007/MPV_lazy/wiki/4_GLSL

// CuNNy fast DS
// Copyright (c) 2024 funnyplanter

// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3.0 of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program.  If not, see <https://www.gnu.org/licenses/>.
/* ------------------------------------------------------------------- */


//!DESC [CuNNy_fast_DS] -in
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND LUMA
//!SAVE in
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) F((LUMA_mul * texelFetch(LUMA_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(1, 1) + ivec2(0, 0), 0)).r)
shared F G[1][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
		}
	}
	barrier();
	F s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2];
	r0 += V4(4.041e-02, 9.762e-02, 2.171e-02, 8.674e-02) * s0_0_0;
	r1 += V4(1.336e-02, -5.773e-02, -9.529e-02, 7.143e-02) * s0_0_0;
	r2 += V4(8.325e-02, 1.928e-02, -9.564e-02, -5.571e-01) * s0_0_0;
	r0 += V4(-6.575e-02, -1.562e-02, 1.012e+00, 3.738e-01) * s0_0_1;
	r1 += V4(-5.832e-02, 3.951e-01, -2.196e-01, -8.078e-02) * s0_0_1;
	r2 += V4(2.241e-01, 1.606e-01, 2.095e-01, -1.899e-01) * s0_0_1;
	r0 += V4(2.574e-02, -8.587e-02, 5.164e-02, -7.524e-02) * s0_0_2;
	r1 += V4(9.939e-01, 5.512e-01, 5.513e-03, 8.552e-04) * s0_0_2;
	r2 += V4(-3.983e-02, -2.604e-01, -1.028e-01, 1.299e-01) * s0_0_2;
	r0 += V4(9.906e-01, 3.528e-02, -6.959e-02, 2.416e-01) * s0_1_0;
	r1 += V4(-2.832e-02, 1.157e-01, -2.183e-01, -8.136e-02) * s0_1_0;
	r2 += V4(3.729e-01, -4.742e-03, -6.080e-02, 2.169e-01) * s0_1_0;
	r0 += V4(-9.435e-01, 8.926e-01, -8.849e-01, 4.545e-03) * s0_1_1;
	r1 += V4(-7.676e-01, -9.239e-01, 7.246e-01, -9.115e-01) * s0_1_1;
	r2 += V4(-1.050e-01, 7.291e-01, 7.910e-01, 8.643e-01) * s0_1_1;
	r0 += V4(-4.442e-02, -2.619e-02, -1.304e-01, -3.640e-01) * s0_1_2;
	r1 += V4(-1.055e-01, -6.269e-01, 1.415e-02, 3.846e-02) * s0_1_2;
	r2 += V4(4.092e-02, -6.295e-01, -1.182e-01, -1.678e-01) * s0_1_2;
	r0 += V4(-3.576e-02, -9.187e-01, 5.371e-02, -1.712e-01) * s0_2_0;
	r1 += V4(-1.685e-03, 5.898e-02, 5.844e-03, 8.580e-03) * s0_2_0;
	r2 += V4(1.522e-02, -5.661e-02, -2.173e-01, 1.233e-01) * s0_2_0;
	r0 += V4(2.638e-02, -9.028e-02, -1.191e-01, -5.175e-02) * s0_2_1;
	r1 += V4(-3.072e-02, 3.127e-01, -6.369e-02, 9.259e-02) * s0_2_1;
	r2 += V4(-5.195e-02, 7.929e-02, -2.329e-01, -7.104e-02) * s0_2_1;
	r0 += V4(9.126e-03, 1.058e-01, 6.688e-02, 2.216e-02) * s0_2_2;
	r1 += V4(1.929e-03, 1.770e-01, 2.155e-02, 8.600e-01) * s0_2_2;
	r2 += V4(2.261e-02, -2.613e-02, -1.607e-02, -3.508e-01) * s0_2_2;
	r0 += V4(-3.838e-03, -7.199e-04, -1.880e-03, 1.684e-02);
	r0 = max(r0, V4(0.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 += V4(3.019e-03, 1.704e-02, 1.492e-02, -4.056e-03);
	r1 = max(r1, V4(0.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 += V4(4.259e-04, 1.454e-02, 1.021e-02, -2.542e-03);
	r2 = max(r2, V4(0.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_fast_DS] -conv1
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND in
//!BIND LUMA
//!SAVE conv1
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((in_mul * texelFetch(in_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((in_mul * texelFetch(in_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((in_mul * texelFetch(in_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(-5.793e-02, 1.437e-01, -5.624e-02, -2.114e-01, 1.239e-02, -7.931e-02, 3.517e-01, -7.483e-02, -7.817e-02, 8.176e-02, 1.107e-01, -1.419e-01, 1.799e-01, -1.982e-02, -3.271e-01, 3.269e-01) * s0_0_0;
	r1 += M4(-2.583e-01, 1.209e-01, 2.470e-01, 4.268e-01, 9.781e-02, -1.306e-01, 7.007e-02, -1.492e-01, -6.958e-02, 1.060e-01, 1.642e-01, 3.573e-01, 1.628e-02, 5.359e-02, -7.962e-02, -2.816e-02) * s0_0_0;
	r2 += M4(-6.912e-02, -3.226e-01, 1.221e-01, -2.013e-02, 1.068e-01, -4.246e-01, -2.715e-02, -1.831e-01, -3.174e-01, -3.733e-01, 1.941e-01, -6.484e-02, 7.920e-01, 1.910e-01, -2.347e-01, -3.281e-02) * s0_0_0;
	r0 += M4(-9.144e-01, -2.551e-01, 8.270e-01, -2.224e-01, -5.791e-01, 6.883e-02, 4.215e-01, 2.881e-01, -3.261e-01, -1.897e-01, -1.381e-01, 1.788e-02, -2.375e-01, 1.929e-01, -7.566e-01, 1.873e-02) * s0_0_1;
	r1 += M4(-6.167e-01, 8.290e-03, -1.164e+00, 5.918e-01, -7.962e-02, -6.844e-01, -2.755e-01, -1.369e-01, -2.666e-01, -1.141e-01, -7.363e-01, 1.719e-01, 2.205e-01, -5.678e-02, -2.590e-01, -1.307e-01) * s0_0_1;
	r2 += M4(-1.740e-02, -4.053e-01, 6.158e-01, -1.900e-01, 2.660e-02, -6.230e-01, -2.625e-01, -5.552e-01, -1.060e-01, 4.696e-02, 5.771e-02, 7.125e-02, 1.742e-01, -7.998e-02, -5.895e-01, 9.429e-02) * s0_0_1;
	r0 += M4(-7.630e-01, -2.434e-01, -7.545e-02, 3.426e-01, -7.594e-01, -6.681e-02, 4.800e-01, 3.129e-01, -3.156e-01, -2.881e-01, -5.194e-01, -1.130e-01, 3.195e-01, -1.436e-02, 1.365e-01, -8.451e-02) * s0_0_2;
	r1 += M4(-7.848e-02, -2.095e-01, -8.613e-01, 2.246e-01, 1.924e-01, -6.884e-01, 8.730e-01, 2.739e-01, -1.938e-01, -5.877e-02, 1.743e-01, 1.052e-01, 1.864e-01, 1.143e-01, 5.163e-01, -1.273e-01) * s0_0_2;
	r2 += M4(8.928e-01, -8.917e-03, 7.071e-03, -2.401e-02, 7.466e-01, -2.662e-01, 8.652e-01, -3.864e-01, -1.192e-01, 5.422e-02, 9.368e-03, 2.023e-01, -4.098e-01, 3.408e-01, 1.175e-01, -5.626e-02) * s0_0_2;
	r0 += M4(-1.674e-01, 4.093e-01, -3.985e-01, -3.958e-02, -7.056e-02, 2.259e-02, 3.918e-01, 1.772e-01, -2.949e-02, -7.605e-02, -3.259e-01, -3.547e-01, 7.530e-01, -7.234e-02, -2.794e-01, 5.098e-01) * s0_1_0;
	r1 += M4(1.472e-02, 2.133e-01, 9.279e-02, 2.587e-01, 4.651e-02, 9.096e-02, 2.007e-01, 1.085e-01, 7.451e-02, 3.506e-01, 7.350e-01, 7.936e-01, -2.760e-01, 7.826e-02, 5.889e-01, -3.818e-01) * s0_1_0;
	r2 += M4(-7.395e-02, 4.412e-01, -2.676e-01, 1.914e-01, 1.044e-01, -3.040e-01, -6.418e-02, -1.664e-01, -9.579e-01, -9.482e-02, -3.173e-01, 4.307e-02, -2.677e-01, 1.257e-01, 1.356e-01, 1.795e-01) * s0_1_0;
	r0 += M4(-5.566e-01, 3.013e-01, -7.747e-02, 6.084e-02, -5.854e-01, 2.211e-02, 8.184e-01, -4.767e-02, -7.793e-01, -2.308e-01, 4.798e-01, 2.201e-01, -4.292e-01, 4.666e-01, 7.555e-01, -3.262e-02) * s0_1_1;
	r1 += M4(-3.184e-01, -5.334e-01, 5.407e-01, 1.924e+00, -2.295e-01, 3.370e-01, 3.039e-01, 2.573e-01, 1.546e-01, -5.643e-01, -5.847e-01, -2.725e-01, -2.209e-01, -8.191e-02, -8.451e-01, 1.276e-01) * s0_1_1;
	r2 += M4(-5.870e-01, -1.328e-01, -2.546e+00, -1.731e-02, -8.496e-01, -3.189e-01, -8.829e-01, -3.296e-02, -1.137e+00, -2.618e-02, 1.456e-01, 4.182e-01, 7.027e-02, -8.150e-02, -1.020e-02, 1.740e-01) * s0_1_1;
	r0 += M4(4.578e-01, -1.277e+00, 1.108e+00, 3.026e-01, -1.135e-01, -4.822e-02, 4.722e-01, 6.707e-01, -8.709e-03, -1.130e+00, -8.226e-02, 3.917e-02, -1.173e-02, 4.348e-01, -8.259e-02, 2.105e-01) * s0_1_2;
	r1 += M4(5.430e-01, -7.593e-02, -6.530e-01, -5.584e-02, 1.446e-01, -2.666e-01, 4.734e-01, -5.937e-01, 1.247e-01, -3.194e-01, -2.187e-01, 9.619e-02, 1.156e-01, 5.714e-02, 7.781e-01, -3.513e-01) * s0_1_2;
	r2 += M4(-3.953e-01, -5.922e-02, -2.658e-01, 3.595e-01, -3.741e-02, -2.881e-01, 4.929e-02, -6.991e-02, 4.054e-01, -1.745e-01, -1.172e-01, 1.048e-01, -2.291e-01, 1.794e-01, -1.096e-01, -9.698e-02) * s0_1_2;
	r0 += M4(-4.563e-02, 1.857e-01, -1.357e-01, -8.791e-02, 5.900e-02, 9.665e-02, 3.228e-02, -6.163e-02, 7.805e-02, 1.282e-01, -2.803e-01, -4.851e-01, 1.249e-01, -2.914e-01, 1.291e-01, 3.360e-01) * s0_2_0;
	r1 += M4(-8.223e-02, 9.590e-02, -6.355e-02, -7.048e-01, -4.231e-02, 5.209e-02, 3.164e-02, -1.577e-01, 1.105e-01, 2.338e-01, 2.528e-01, -4.056e+00, 2.703e-02, -1.890e-01, -4.970e-01, 5.371e-01) * s0_2_0;
	r2 += M4(1.362e-01, 2.915e-01, 2.494e-01, 2.440e-01, 2.478e-01, -6.189e-02, -2.056e-01, -2.928e-02, 4.298e-02, 3.366e-01, -1.155e+00, 1.861e-01, -5.776e-01, 2.144e-01, 3.928e-01, 6.699e-02) * s0_2_0;
	r0 += M4(4.797e-01, 2.636e-01, -4.968e-01, -6.360e-01, 1.711e-02, 2.865e-01, 2.516e-02, -1.186e-01, 3.340e-01, -5.334e-02, -1.058e-01, -5.955e-03, -3.566e-01, -4.718e-01, -2.370e-01, 2.941e-02) * s0_2_1;
	r1 += M4(2.275e-01, 2.419e-01, -1.898e-01, -2.961e+00, -5.514e-02, 9.337e-02, 1.427e-01, -5.465e-01, -1.236e-01, -1.748e-01, 2.343e-01, -9.444e-01, -3.799e-01, 7.420e-02, -4.695e-02, 1.690e-01) * s0_2_1;
	r2 += M4(7.827e-02, -1.106e-01, -1.081e+00, 1.246e-01, 5.839e-02, -8.716e-02, -2.274e-01, 1.062e-01, -2.935e-01, -3.069e-02, -1.647e+00, 6.223e-02, 4.003e-01, 2.450e-01, 9.356e-01, 4.308e-01) * s0_2_1;
	r0 += M4(3.564e-01, 1.714e-01, -4.104e-01, -5.176e-01, 1.440e-01, 1.086e-01, 9.368e-02, -5.797e-02, 2.159e-01, -5.217e-01, 4.804e-01, 5.459e-02, -4.991e-01, 7.177e-01, -3.506e-01, -3.810e-01) * s0_2_2;
	r1 += M4(1.890e-01, 1.471e-02, -3.548e-03, -4.460e-01, 6.173e-02, 8.861e-02, 5.269e-02, -3.833e-01, 1.899e-01, -5.723e-02, -1.799e-02, -2.577e-01, -2.382e-01, 2.489e-01, 6.565e-01, -5.140e-02) * s0_2_2;
	r2 += M4(5.460e-01, -3.154e-01, 1.737e-01, -1.760e-01, 8.758e-02, -2.084e-01, -1.411e-01, -1.656e-01, 5.137e-01, -1.637e-01, 8.533e-02, -2.110e-01, -3.249e-01, 3.228e-01, 1.539e-01, 3.799e-03) * s0_2_2;
	r0 += M4(2.378e-02, 2.124e-01, 3.130e-01, -4.807e-02, 2.543e-01, -4.206e-01, 2.016e-01, 1.635e-03, 1.302e-01, -1.142e-01, 2.047e-01, 1.261e-01, -5.733e-01, 2.441e-01, 4.393e-02, -1.892e-01) * s1_0_0;
	r1 += M4(2.352e-02, 2.913e-02, -1.353e-02, -1.572e-01, 4.252e-01, 1.216e-02, 4.087e-01, -1.500e-01, 2.951e-01, 8.932e-02, 5.264e-02, -5.840e-01, -6.322e-02, -5.531e-02, -8.932e-02, 3.185e-01) * s1_0_0;
	r2 += M4(-4.753e-02, -3.840e-01, -1.124e-01, -2.123e-01, 2.920e-01, 9.506e-02, -2.545e-02, -1.509e-01, -1.153e-01, 2.633e-01, -1.646e-01, -1.948e-01, -1.130e+00, 1.617e-01, 1.858e-01, 1.548e-01) * s1_0_0;
	r0 += M4(-1.962e-01, 2.520e-02, 4.568e-02, 6.318e-02, 3.145e-01, 1.768e-01, 8.268e-01, -9.957e-03, 4.391e-01, 3.093e-01, -6.162e-01, -9.576e-02, 7.885e-02, -3.447e-01, -2.338e-01, 1.420e-01) * s1_0_1;
	r1 += M4(1.518e-01, -1.861e-01, -6.892e-01, -1.664e-01, 5.594e-02, 2.422e-01, 5.440e-01, 1.958e-01, 4.709e-01, 1.364e-01, -4.880e-01, -1.723e-01, 4.833e-01, 6.902e-02, -9.097e-01, 1.864e-01) * s1_0_1;
	r2 += M4(1.973e-01, -1.969e-01, -1.138e-01, -2.397e-01, 4.193e-02, 5.100e-02, 3.971e-01, -7.230e-02, 3.895e-01, -3.506e-01, 1.234e-02, 7.940e-02, -2.719e-01, 2.725e-01, -2.123e-02, -1.089e-01) * s1_0_1;
	r0 += M4(-6.806e-02, -1.297e-01, -3.324e-02, 5.789e-02, 1.678e-01, 2.352e-01, 1.997e-01, 1.184e-01, 5.532e-02, -1.400e-01, 5.981e-01, 2.959e-02, 1.442e-01, -1.647e-01, -1.489e-01, -2.528e-01) * s1_0_2;
	r1 += M4(-9.201e-02, -5.301e-02, 1.695e-01, 7.608e-04, 9.493e-02, 6.846e-02, -2.957e-01, 1.784e-01, -1.367e-01, 1.587e-01, -3.724e-01, -2.046e-01, 5.560e-02, 7.893e-02, -2.444e-01, 6.585e-02) * s1_0_2;
	r2 += M4(1.111e-01, 4.233e-02, 1.312e-01, -1.045e-03, -6.397e-02, -8.755e-02, -8.858e-02, -1.255e-02, 5.189e-01, -1.851e-01, -5.811e-02, -9.851e-02, 3.475e-03, 8.345e-02, 8.423e-02, 1.555e-01) * s1_0_2;
	r0 += M4(-2.498e-01, -2.431e-01, -3.027e-02, -3.847e-01, -2.584e-02, 7.390e-02, 2.629e-01, -9.515e-03, -3.316e-02, -6.325e-01, 4.346e-01, 1.090e-02, -2.154e-01, 7.739e-02, -1.760e-01, -6.918e-02) * s1_1_0;
	r1 += M4(5.470e-02, 9.903e-01, 5.697e-01, 5.208e-01, -2.850e-02, -6.852e-02, 1.629e-01, -5.823e-01, -2.673e-01, -5.983e-02, -5.295e-01, -8.711e-01, -8.140e-02, 8.176e-01, 4.590e-02, -5.916e-01) * s1_1_0;
	r2 += M4(-5.561e-01, 6.840e-01, -1.260e-01, 1.762e-01, 4.394e-01, -5.004e-01, 1.015e+00, -3.291e-01, -4.471e-02, -1.025e+00, -8.195e-02, -4.353e-01, -6.169e-01, 2.239e-01, -5.421e-01, 1.812e-01) * s1_1_0;
	r0 += M4(7.757e-01, 2.465e-01, -4.435e-01, -1.458e-01, 1.774e-01, 3.860e-01, -2.528e-01, -4.051e-01, -2.937e-01, 3.373e-02, -3.759e-01, -6.443e-01, 1.617e-01, -5.429e-01, -1.685e-01, 5.996e-01) * s1_1_1;
	r1 += M4(3.541e-01, 4.807e-01, -9.086e-01, -3.684e-01, -2.231e-02, 2.368e-01, -5.329e-02, 1.900e-01, -3.223e-01, 1.035e-02, -6.130e-01, -3.550e-01, 5.308e-02, -5.571e-01, -3.878e-01, -7.013e-01) * s1_1_1;
	r2 += M4(-1.412e+00, -2.662e-02, -6.195e-01, 1.746e-01, 1.589e-01, -1.901e-01, 6.597e-02, -1.999e-01, -6.467e-01, 9.629e-01, 1.666e-01, -2.466e-01, -9.949e-01, 2.130e-01, -3.163e-01, -1.284e-01) * s1_1_1;
	r0 += M4(-8.678e-02, 4.703e-02, 1.371e-01, 5.427e-02, 1.150e-01, 3.510e-01, -5.269e-02, 1.063e-01, -8.140e-02, 9.568e-01, -4.882e-01, 5.090e-02, -4.781e-02, -5.384e-02, -1.541e-02, 1.773e-02) * s1_1_2;
	r1 += M4(3.369e-03, -3.054e-02, 2.146e-01, 1.174e-01, -3.636e-02, -2.807e-02, -7.365e-02, 9.401e-02, -1.509e-01, -9.430e-02, 5.688e-01, -4.158e-03, 1.587e-02, 5.550e-02, -1.556e-01, 9.531e-03) * s1_1_2;
	r2 += M4(-2.858e-03, -9.425e-02, 4.138e-02, -2.776e-02, -9.450e-02, -3.554e-02, 4.024e-02, -1.226e-01, -1.645e-01, 3.307e-01, -2.328e-02, 1.403e-01, 7.816e-02, 1.411e-01, 9.530e-03, 1.139e-01) * s1_1_2;
	r0 += M4(3.823e-01, -1.168e-01, -4.570e-01, -8.056e-01, -9.290e-02, -5.203e-02, 6.189e-02, 2.869e-01, 3.388e-01, 1.642e-01, 7.824e-03, 9.350e-02, 3.466e-02, 9.631e-02, -9.206e-02, -1.763e-01) * s1_2_0;
	r1 += M4(1.128e-02, -1.548e-01, -2.194e-01, 1.602e-01, 5.895e-02, 1.636e-01, -3.469e-03, -8.158e-01, 2.783e-01, -3.604e-01, 4.092e-01, 7.297e-01, -4.442e-02, -6.679e-02, -6.041e-03, -4.674e-01) * s1_2_0;
	r2 += M4(6.855e-01, 7.765e-02, 1.136e+00, -6.864e-02, -1.267e-01, -2.226e-01, 3.038e-02, -2.462e-01, 5.214e-02, -4.178e-01, -3.895e-01, -3.076e-01, -1.851e-02, 4.209e-02, -2.338e-01, 1.019e-01) * s1_2_0;
	r0 += M4(3.338e-01, -3.785e-01, -4.971e-01, -8.008e-02, 1.730e-01, -1.629e-02, -1.244e-01, 1.049e-01, 1.043e-01, 7.607e-03, 2.371e-01, 8.368e-01, -3.654e-02, 2.404e-02, -5.377e-02, -1.787e-01) * s1_2_1;
	r1 += M4(-3.069e-01, 3.120e-01, -7.055e-01, -2.520e-01, 2.459e-01, -8.016e-02, 4.696e-01, 1.252e+00, -1.844e-01, -4.125e-01, 6.832e-02, -1.945e-01, -1.060e-01, -7.293e-02, -1.543e-01, -1.193e+00) * s1_2_1;
	r2 += M4(1.317e-01, 6.183e-02, -2.024e-02, 1.682e-01, -2.379e-01, -1.816e-03, 5.784e-01, 2.426e-01, 5.501e-02, 4.575e-02, -1.247e+00, 4.247e-01, 1.704e-01, 1.446e-01, -4.304e-03, 1.257e-01) * s1_2_1;
	r0 += M4(-4.706e-02, 3.722e-02, 1.999e-01, 2.293e-02, 1.320e-02, 1.012e-01, 2.134e-01, 1.492e-01, 6.509e-02, 4.139e-01, -1.032e-01, 2.077e-02, -2.200e-02, -2.226e-02, -7.304e-02, 2.169e-02) * s1_2_2;
	r1 += M4(5.361e-03, 1.031e-01, 9.544e-02, -4.414e-02, 7.660e-02, 4.277e-03, -2.581e-02, 8.032e-02, 7.214e-02, 2.529e-01, 7.067e-01, 9.443e-01, 4.328e-03, -3.902e-02, 1.072e-01, -2.231e-01) * s1_2_2;
	r2 += M4(-1.065e-01, -5.097e-02, 1.159e-01, -6.687e-02, -3.538e-02, 3.894e-02, -2.348e-01, 1.061e-01, 2.981e-01, 2.499e-01, 8.512e-01, 1.809e-01, 5.180e-02, 8.078e-02, 9.586e-02, 5.377e-03) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(4.806e-02, -5.717e-02, 8.821e-02, 2.588e-01, -1.417e-01, 1.973e-01, -7.111e-01, 1.484e-01, 5.379e-01, -8.655e-03, 1.581e-01, 6.440e-02, -1.582e-01, 5.965e-02, -1.851e-01, 1.486e-01) * s0_0_0;
	r1 += M4(4.208e-01, -4.398e-01, -6.450e-01, -6.734e-01, 3.400e-01, 7.016e-02, 5.733e-01, 7.057e-02, 4.472e-01, -1.149e-02, -2.588e-01, 1.024e-02, -1.227e-02, -1.458e-01, -2.577e-01, 1.609e-02) * s0_0_0;
	r2 += M4(-1.534e-01, 8.636e-01, 2.766e-01, 2.317e-01, -3.787e-02, 2.955e-01, -1.553e-01, 1.170e-01, -4.113e-01, 3.517e-01, 2.475e-01, 3.855e-02, 6.763e-02, 3.525e-01, 1.849e-01, 1.303e-01) * s0_0_0;
	r0 += M4(3.619e-02, 6.815e-01, -8.528e-01, -2.384e-01, 1.633e-01, -3.683e-01, -2.691e-02, -1.595e-01, 1.024e+00, -1.105e-01, 7.847e-02, -2.360e-01, 1.379e-01, -1.566e-02, -3.915e-01, 3.224e-02) * s0_0_1;
	r1 += M4(7.341e-02, 2.611e-01, 1.149e-02, -1.513e-01, 3.393e-02, 3.339e-01, 9.530e-01, -2.989e-02, 4.601e-01, -2.412e-01, -4.669e-01, 5.359e-01, 2.908e-01, -1.066e-01, 9.768e-02, -2.064e-02) * s0_0_1;
	r2 += M4(-2.108e-02, 7.377e-01, 1.578e-01, 6.285e-01, -5.576e-01, 2.112e-01, 2.694e-01, 1.519e-01, -5.527e-01, 1.320e-01, 3.443e-01, -1.317e-01, 1.166e-02, 3.330e-01, 2.486e-02, 1.997e-01) * s0_0_1;
	r0 += M4(5.478e-01, -1.575e-01, -5.930e-01, -1.572e-01, 2.116e-01, -3.907e-02, -4.671e-03, 7.206e-02, -5.033e-01, 3.774e-01, 3.935e-01, 5.040e-02, 4.097e-02, 2.243e-01, -9.715e-02, -2.081e-01) * s0_0_2;
	r1 += M4(1.800e-01, 9.336e-02, -3.600e-02, -4.705e-01, 1.528e-01, 3.023e-02, -4.648e-01, -2.117e-02, -3.396e-01, -1.249e-01, 4.994e-01, 3.662e-01, -2.282e-01, -2.838e-02, 1.716e-01, -1.343e-02) * s0_0_2;
	r2 += M4(1.847e-01, 1.751e-01, -3.698e-01, 1.066e-01, 7.908e-02, -1.808e-01, -1.684e-01, 2.740e-02, 3.340e-01, 2.052e-01, 1.091e-01, 1.502e-01, -3.099e-01, 2.861e-01, -7.105e-02, 1.900e-01) * s0_0_2;
	r0 += M4(7.519e-02, -6.622e-01, 7.402e-01, -8.287e-02, 5.269e-01, 7.835e-01, -1.753e-01, -4.876e-01, -1.264e-01, -8.813e-02, 1.571e-01, 2.362e-01, 1.626e-01, 5.428e-02, -2.900e-01, 2.082e-01) * s0_1_0;
	r1 += M4(4.633e-01, -3.245e-01, -1.685e-01, 3.428e-02, 5.220e-01, 1.012e-01, -5.715e-01, -1.830e+00, -2.897e-01, -3.844e-01, -6.536e-01, -5.206e-01, 2.238e-01, -3.204e-01, -1.046e-01, 8.743e-02) * s0_1_0;
	r2 += M4(1.094e-01, -1.108e+00, 2.310e-01, -7.067e-01, -1.046e+00, 4.273e-01, 1.355e-01, 6.389e-01, -8.484e-02, 3.686e-01, 5.847e-01, -5.668e-02, 3.062e-01, 4.446e-01, 1.422e-01, 2.725e-01) * s0_1_0;
	r0 += M4(-3.108e-02, -2.054e-02, -6.917e-01, -6.844e-01, -1.832e-01, -1.369e-01, -2.367e-01, -7.988e-01, 7.779e-01, 6.147e-02, -4.021e-01, -1.283e-01, 2.476e-01, -1.253e-01, -5.314e-01, -2.518e-02) * s0_1_1;
	r1 += M4(-2.656e-01, -1.121e-01, -1.218e-01, -3.049e-02, -3.418e-01, -2.314e-02, 2.059e-01, -1.826e-01, 2.131e-01, 5.128e-01, 3.841e-01, -2.905e-01, -2.967e-02, 3.408e-01, -3.535e-01, 2.279e-02) * s0_1_1;
	r2 += M4(9.081e-02, -6.919e-01, 1.941e-01, -6.889e-01, 1.322e+00, -2.783e-01, 4.108e-01, 7.252e-02, 7.483e-01, -4.656e-01, -3.942e-01, 1.397e-01, 4.725e-03, 6.387e-01, 9.933e-01, 3.975e-01) * s0_1_1;
	r0 += M4(5.600e-02, -1.743e-01, -7.446e-01, -3.718e-01, 9.680e-02, -2.067e-01, 3.751e-02, 2.093e-03, -8.911e-01, -1.424e-01, 1.395e-01, -3.356e-02, 3.086e-01, 1.206e-01, -2.971e-01, -1.952e-01) * s0_1_2;
	r1 += M4(-1.168e-01, 3.877e-01, 5.914e-01, 4.269e-01, 7.583e-02, -3.061e-02, -3.569e-01, 4.847e-02, -6.301e-01, -1.860e-01, -2.049e-01, 1.854e-01, 1.150e-01, 8.011e-02, -1.539e-01, -2.284e-01) * s0_1_2;
	r2 += M4(5.370e-01, 8.620e-01, 4.100e-01, 8.848e-01, 2.677e-01, -8.418e-02, 2.566e-01, 1.189e-02, 2.502e-01, -2.544e-01, -2.360e-01, -4.004e-01, 6.804e-02, 1.656e-01, -5.684e-01, 1.958e-01) * s0_1_2;
	r0 += M4(2.203e-01, 6.620e-02, 5.581e-01, 3.857e-01, -5.105e-01, 3.117e-01, 6.610e-01, 7.482e-01, -1.765e-01, -2.504e-01, -3.383e-01, 1.616e-01, 1.569e-01, -3.263e-02, -2.247e-01, 2.682e-01) * s0_2_0;
	r1 += M4(-4.755e-02, -2.160e-01, 3.750e-01, 1.176e-01, -4.736e-01, -1.301e-01, 2.974e-01, 7.333e-01, 1.535e-01, -8.072e-02, 2.286e-01, 6.146e-01, 1.556e-01, -1.571e-01, -6.318e-02, 6.627e-01) * s0_2_0;
	r2 += M4(6.135e-02, -6.542e-01, -3.905e-01, -5.065e-01, 4.833e-01, -2.496e-01, -3.631e-01, -3.919e-01, -4.655e-01, 1.551e-01, 4.792e-01, 5.366e-02, -6.939e-02, 3.735e-01, 4.503e-01, 1.793e-01) * s0_2_0;
	r0 += M4(-8.504e-01, 1.140e-01, 7.879e-01, 1.251e-01, -2.651e-01, 5.365e-01, 5.840e-01, 8.566e-01, -2.647e-01, -4.137e-01, 2.034e-01, 1.322e-01, 3.842e-01, -4.128e-01, -3.455e-01, 3.753e-01) * s0_2_1;
	r1 += M4(-5.566e-01, 4.217e-01, 4.432e-02, -2.279e-02, -9.848e-02, 2.006e-01, 2.673e-01, 7.122e-01, 2.206e-01, -2.704e-01, -2.145e-01, 5.944e-01, 1.978e-02, -2.368e-01, -4.357e-01, -4.817e-01) * s0_2_1;
	r2 += M4(-2.915e-01, -2.038e-02, -5.732e-01, 3.906e-01, -6.266e-01, 1.804e-01, -7.832e-01, -8.316e-02, -1.067e-01, 1.808e-02, -1.379e-01, -6.694e-01, -1.639e-01, 1.921e-01, -4.831e-01, 1.420e-01) * s0_2_1;
	r0 += M4(-4.150e-01, -9.548e-02, 6.596e-01, 5.651e-01, 3.309e-02, -4.270e-01, 4.249e-02, 3.158e-01, -6.077e-03, 1.491e-01, 3.427e-01, -3.163e-01, 6.046e-02, -4.185e-02, -4.778e-02, 1.801e-01) * s0_2_2;
	r1 += M4(-1.392e-01, 9.232e-02, 1.868e-01, 6.026e-01, 5.838e-02, -1.187e-01, -8.951e-02, 1.968e-01, -8.556e-02, 2.010e-01, -1.693e-01, 2.111e-01, -1.645e-02, -6.625e-02, -4.349e-03, -2.504e-02) * s0_2_2;
	r2 += M4(-4.454e-01, -3.750e-01, -5.101e-02, -1.396e-01, -1.154e-02, -2.251e-01, 9.889e-02, -1.381e-01, 1.060e-02, -2.040e-01, 1.127e-01, 3.613e-01, 2.862e-01, 1.388e-01, 1.269e-01, 2.062e-02) * s0_2_2;
	r0 += V4(-5.355e-02, -2.641e-02, -4.972e-02, -2.863e-02);
	r0 = max(r0, V4(0.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 += V4(-1.941e-02, -2.420e-02, -3.968e-02, -2.284e-02);
	r1 = max(r1, V4(0.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 += V4(1.763e-02, -1.955e-02, -1.610e-02, -2.207e-02);
	r2 = max(r2, V4(0.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_fast_DS] -conv2
//!HOOK LUMA
//!COMPUTE 16 8 8 8
//!BIND conv1
//!BIND LUMA
//!SAVE conv2
//!WIDTH LUMA.w 2 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((conv1_mul * texelFetch(conv1_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((conv1_mul * texelFetch(conv1_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((conv1_mul * texelFetch(conv1_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(2, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1;
	r0 = V4(0.0); r1 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(-6.562e-02, -7.770e-03, -3.766e-01, -6.886e-02, 8.751e-02, 2.399e-02, -2.640e-01, 1.343e-02, 3.002e-02, 2.189e-03, -7.962e-02, 1.930e-02, 1.123e-02, 7.868e-03, -8.574e-03, -1.799e-02) * s0_0_0;
	r1 += M4(-3.880e-02, -4.792e-02, -2.781e-02, 7.407e-02, -5.046e-03, 2.432e-02, 7.947e-04, -1.250e-02, 1.351e-02, 1.432e-02, 4.624e-03, -5.064e-02, -2.118e-02, -1.731e-02, 9.639e-03, 5.379e-02) * s0_0_0;
	r0 += M4(-3.247e-02, 3.155e-02, -3.726e-02, 1.704e-01, 2.070e-02, 4.453e-02, -5.553e-02, -9.534e-02, 6.519e-03, -1.752e-02, 4.971e-03, 4.728e-04, 2.983e-02, 4.972e-02, -1.376e-01, 5.244e-02) * s0_0_1;
	r1 += M4(8.625e-02, 1.989e-01, 6.049e-02, 1.432e-01, 3.426e-02, -7.495e-02, 5.505e-02, 2.078e-02, 1.647e-02, -9.789e-02, 2.210e-02, -9.938e-02, -4.833e-02, 6.193e-02, -8.374e-02, -1.711e-02) * s0_0_1;
	r0 += M4(1.426e-02, 1.567e-02, -1.562e-02, 5.777e-02, 6.015e-03, 2.984e-02, 2.618e-02, -1.999e-02, -7.128e-03, -2.634e-02, 2.266e-02, -6.863e-02, 4.281e-03, 4.625e-02, 1.629e-02, 5.357e-02) * s0_0_2;
	r1 += M4(1.501e-02, 1.153e-01, 1.835e-03, -1.260e-02, -8.148e-03, -3.482e-02, -5.734e-03, -2.364e-02, 2.706e-02, -5.031e-03, 2.840e-02, 5.301e-02, 2.523e-02, 4.188e-02, 2.036e-02, 4.105e-02) * s0_0_2;
	r0 += M4(5.079e-03, 4.599e-02, -1.538e-01, 7.374e-03, -1.899e-01, 1.129e-03, 4.218e-01, 2.665e-02, -8.179e-02, 3.991e-02, -1.987e-01, -1.717e-02, 6.865e-02, 3.531e-02, 1.848e-01, -7.280e-02) * s0_1_0;
	r1 += M4(1.134e-01, 1.069e-01, 2.043e-01, -8.028e-02, 3.945e-02, 4.746e-02, -1.858e-02, 9.215e-03, 6.714e-02, -2.539e-02, 1.255e-01, 1.733e-01, -2.299e-02, -2.769e-02, 1.305e-01, 1.308e-01) * s0_1_0;
	r0 += M4(-4.639e-02, -3.139e-01, 1.349e-01, -4.672e-01, -5.171e-02, -3.213e-01, -3.956e-01, -1.792e-01, -4.265e-03, 2.510e-01, 9.220e-02, 3.292e-01, -8.726e-02, 3.838e-01, -2.630e-01, 2.134e-01) * s0_1_1;
	r1 += M4(-3.563e-01, -9.199e-01, -3.555e-01, -4.913e-01, -3.252e-01, -2.788e-01, -2.686e-01, -2.390e-01, 3.984e-02, 4.014e-01, -2.509e-01, 1.969e-02, 3.291e-01, 2.307e-01, 2.017e-01, 1.519e-01) * s0_1_1;
	r0 += M4(-2.085e-02, -2.311e-01, 3.795e-02, -1.510e-01, -1.716e-02, 1.217e-01, -6.189e-02, 1.388e-01, -6.073e-03, -1.085e-01, 1.422e-02, -4.879e-02, -3.149e-02, 8.988e-02, -1.139e-01, -4.367e-03) * s0_1_2;
	r1 += M4(-1.180e-01, -3.096e-01, -1.597e-01, -2.530e-01, 3.721e-02, 1.323e-01, -3.641e-02, 3.750e-02, -1.082e-01, -8.434e-02, 1.344e-02, -1.707e-02, -5.465e-05, -2.242e-02, 1.303e-02, 7.310e-02) * s0_1_2;
	r0 += M4(-3.108e-01, -5.760e-02, -3.603e-02, 3.603e-04, 1.833e-02, 9.089e-03, -1.021e-03, -2.282e-02, 2.033e-01, -8.438e-02, -1.635e-01, 1.722e-02, 2.098e-02, 5.137e-03, 5.735e-02, 5.234e-03) * s0_2_0;
	r1 += M4(-2.354e-02, 2.991e-02, -3.271e-01, -2.261e-01, 1.905e-02, -4.157e-03, 1.220e-02, 1.022e-02, -1.090e-01, -3.044e-02, -8.423e-02, -1.568e-01, -4.792e-03, 5.417e-03, 3.161e-02, 5.481e-02) * s0_2_0;
	r0 += M4(-1.469e-01, -1.382e-01, -4.607e-02, -1.224e-01, 2.160e-02, -2.180e-02, -1.383e-01, -2.719e-02, -1.450e-01, 3.788e-02, -7.713e-02, -7.106e-02, -4.040e-02, -4.473e-02, 1.436e-02, 6.326e-03) * s0_2_1;
	r1 += M4(-1.997e-01, -1.450e-01, -1.522e-02, -1.218e-01, 1.229e-02, 4.231e-02, 4.686e-02, 1.198e-02, 2.512e-01, -2.951e-03, 1.071e-01, 6.127e-02, -1.094e-02, 6.144e-03, -9.108e-02, -4.456e-02) * s0_2_1;
	r0 += M4(-1.827e-02, -1.454e-02, -2.646e-02, 1.463e-02, -8.757e-03, 7.445e-02, -4.722e-02, 9.936e-02, -6.701e-03, 3.163e-02, 6.809e-03, 5.972e-02, 1.141e-02, -2.868e-02, -6.845e-03, -7.972e-02) * s0_2_2;
	r1 += M4(-1.665e-02, -4.891e-02, -5.179e-02, 3.975e-03, 1.113e-01, 6.134e-02, 9.992e-03, -2.918e-03, -1.343e-01, -9.565e-02, -7.885e-02, -6.190e-02, -7.342e-02, -5.751e-02, -8.073e-03, -3.061e-02) * s0_2_2;
	r0 += M4(6.599e-03, 9.273e-03, -5.455e-02, 1.356e-02, 8.186e-02, 1.376e-02, -3.538e-02, 1.805e-02, -5.620e-02, 9.381e-03, 6.639e-03, -2.852e-02, -9.587e-02, 2.325e-02, 6.305e-02, 5.412e-03) * s1_0_0;
	r1 += M4(4.151e-03, 4.725e-02, -6.340e-03, 1.903e-02, 4.379e-02, 6.211e-02, 5.462e-02, -2.102e-02, -2.458e-02, -3.505e-02, -3.762e-02, 4.579e-02, -9.132e-03, -1.231e-02, -5.744e-02, -2.579e-02) * s1_0_0;
	r0 += M4(4.501e-02, 3.868e-02, 3.130e-03, 1.186e-02, 5.433e-02, -6.758e-02, -1.948e-01, -1.464e-02, -5.341e-02, 2.291e-02, 1.155e-01, -4.707e-02, -5.576e-02, -2.730e-02, 5.960e-02, -6.150e-02) * s1_0_1;
	r1 += M4(-3.275e-03, -9.139e-03, 1.908e-03, 4.165e-03, 9.448e-02, -7.688e-02, 1.478e-01, 2.238e-02, -6.238e-02, 1.397e-02, -6.567e-02, 5.440e-02, -1.390e-01, -1.525e-01, -1.102e-01, -1.008e-01) * s1_0_1;
	r0 += M4(-7.989e-03, 2.666e-02, 8.569e-03, 4.089e-02, 5.941e-03, 7.356e-02, -1.554e-01, 4.171e-02, 9.189e-03, -4.885e-02, 1.470e-01, -4.907e-02, 1.588e-02, -7.723e-02, 2.389e-02, -2.026e-01) * s1_0_2;
	r1 += M4(3.532e-02, 3.513e-02, 1.710e-02, 2.790e-02, 1.451e-01, 1.529e-01, 1.743e-01, 1.958e-01, -1.189e-01, -3.656e-02, -5.902e-02, -6.842e-02, -4.682e-02, -1.450e-01, -5.457e-02, -1.052e-01) * s1_0_2;
	r0 += M4(1.095e-01, 8.683e-02, 2.144e-01, -2.828e-02, -1.845e-01, 4.698e-02, 1.771e-01, -2.613e-02, 4.581e-03, -1.392e-01, -1.354e-01, -4.589e-02, -1.413e-01, 1.144e-02, 2.212e-01, -1.974e-03) * s1_1_0;
	r1 += M4(6.675e-02, 7.544e-02, 8.786e-02, 9.112e-02, 2.118e-02, 9.403e-03, -1.028e-01, -6.564e-03, -3.587e-02, -9.377e-02, -2.171e-02, -1.077e-01, -1.032e-02, -2.829e-02, 1.445e-02, 1.250e-02) * s1_1_0;
	r0 += M4(-2.727e-02, 1.528e-01, 1.378e-01, 3.466e-01, 2.046e-01, -8.423e-02, 1.775e-01, 6.738e-02, 4.950e-02, -2.181e-01, 2.771e-01, -2.000e-01, -4.197e-01, 6.866e-02, 3.926e-01, 1.215e-02) * s1_1_1;
	r1 += M4(2.049e-01, 1.915e-01, 2.212e-01, 1.343e-01, -3.740e-01, -1.367e-01, -3.220e-01, -1.938e-01, -3.849e-02, -3.742e-01, 9.892e-02, -3.467e-01, -6.565e-02, 8.049e-02, -3.545e-01, 3.940e-02) * s1_1_1;
	r0 += M4(2.988e-03, 7.182e-02, -7.620e-02, 1.806e-02, -1.533e-02, 2.139e-02, -1.529e-01, -9.597e-02, -3.832e-03, -9.500e-02, 1.745e-01, -3.604e-01, -5.326e-02, -1.433e-01, -1.949e-02, -6.909e-02) * s1_1_2;
	r1 += M4(1.117e-01, 7.446e-02, 5.715e-02, 7.410e-02, 2.680e-01, 2.477e-01, 3.900e-01, 4.335e-01, 6.715e-02, -2.245e-01, 4.288e-02, 5.352e-02, -5.410e-01, -1.116e-01, -2.609e-01, -4.164e-02) * s1_1_2;
	r0 += M4(1.772e-01, 3.265e-02, 6.131e-02, -2.352e-02, 2.778e-02, -4.066e-03, 1.399e-02, -1.887e-02, -2.318e-01, -6.253e-02, 1.334e-02, -4.791e-03, 2.375e-02, -2.487e-02, 3.697e-02, -1.178e-02) * s1_2_0;
	r1 += M4(7.069e-02, 6.492e-02, 8.567e-02, 6.403e-02, -2.747e-02, -2.201e-02, 4.402e-02, 3.770e-02, -8.968e-02, -7.007e-02, -4.488e-02, -8.232e-02, -1.607e-02, -2.890e-02, -9.468e-03, -1.546e-02) * s1_2_0;
	r0 += M4(2.455e-01, 7.462e-02, -4.095e-02, 1.670e-01, 8.810e-02, 1.194e-01, -1.941e-02, 1.428e-02, -1.294e-01, -2.269e-01, 2.027e-01, -2.568e-01, 4.605e-02, 3.023e-02, -6.246e-02, -1.455e-02) * s1_2_1;
	r1 += M4(1.581e-02, -1.130e-01, -7.353e-02, -1.606e-01, 8.962e-02, 3.833e-02, 1.284e-01, 1.242e-01, -9.379e-02, -1.339e-01, -2.858e-01, -7.272e-02, 3.358e-02, -2.895e-04, 9.175e-02, 4.212e-02) * s1_2_1;
	r0 += M4(-8.290e-03, -7.852e-02, 2.646e-02, -1.330e-01, 2.096e-02, 7.593e-02, -3.089e-03, -1.614e-02, -9.240e-03, -3.074e-01, 1.119e-01, -5.570e-01, 2.328e-02, 8.825e-02, -2.292e-02, 7.939e-03) * s1_2_2;
	r1 += M4(6.553e-02, -1.999e-02, 1.401e-01, 2.918e-02, 1.065e-01, 6.703e-02, 1.984e-01, 1.433e-01, -4.834e-01, -3.916e-01, -8.716e-02, -1.495e-01, 1.053e-01, 2.787e-02, 1.013e-01, 5.137e-02) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(3.749e-03, -1.902e-03, 1.214e-01, 2.787e-02, -7.052e-02, -1.573e-02, 2.033e-02, -1.660e-02, -8.274e-02, -2.606e-02, -3.505e-03, -9.618e-03, -4.259e-02, -6.421e-02, 1.053e-02, -8.298e-03) * s0_0_0;
	r1 += M4(3.621e-02, 3.964e-02, 7.785e-03, -6.514e-02, -7.337e-03, 1.691e-02, 1.126e-02, 2.773e-02, -3.690e-03, -2.234e-02, 9.264e-03, -1.528e-02, -2.019e-02, -9.243e-02, -7.597e-02, -6.262e-02) * s0_0_0;
	r0 += M4(4.148e-02, -1.051e-01, 4.375e-02, -8.209e-02, 6.855e-02, -4.868e-02, 1.842e-01, -1.822e-02, 4.367e-03, -1.054e-01, -2.667e-02, -1.029e-01, -7.147e-02, 2.172e-02, -8.426e-02, -7.388e-02) * s0_0_1;
	r1 += M4(-9.205e-02, -2.004e-01, -1.278e-01, -2.922e-01, -2.827e-03, -5.736e-02, 4.871e-02, -9.163e-02, -1.083e-01, -1.396e-01, -8.305e-02, -1.004e-01, -1.096e-01, 4.459e-02, -1.302e-01, -5.885e-02) * s0_0_1;
	r0 += M4(1.725e-04, -7.909e-02, 5.263e-02, -9.703e-02, 7.266e-03, -7.254e-02, 8.340e-04, -2.427e-02, -1.407e-02, -7.318e-02, -1.987e-02, -1.597e-01, 1.202e-02, -1.024e-02, -6.964e-02, -5.864e-02) * s0_0_2;
	r1 += M4(-1.122e-02, -1.570e-01, 2.434e-02, -5.537e-02, 5.709e-02, -1.397e-02, 3.358e-02, -2.052e-02, -5.920e-02, -1.328e-01, -2.716e-02, -9.046e-02, -8.466e-02, -1.637e-01, -4.456e-02, -3.970e-02) * s0_0_2;
	r0 += M4(-2.281e-01, -5.448e-03, -4.145e-01, 2.928e-02, -2.340e-01, -1.768e-02, -1.087e-01, -5.502e-03, -2.144e-01, -8.362e-02, -2.444e-01, -6.687e-03, 4.456e-01, 5.326e-02, -4.337e-01, 6.168e-02) * s0_1_0;
	r1 += M4(6.825e-03, 3.503e-02, -1.066e-01, -2.778e-02, -3.304e-02, -1.981e-03, -1.097e-01, -1.199e-01, -4.562e-02, -3.932e-02, -5.603e-02, -5.627e-02, 7.249e-02, 1.160e-01, 2.061e-01, 1.252e-01) * s0_1_0;
	r0 += M4(-1.734e-01, -1.379e-01, -7.026e-02, -1.181e-01, -1.442e-01, -2.700e-01, 1.781e-01, -3.486e-01, -2.328e-01, -3.018e-01, -2.729e-01, -2.173e-01, 6.618e-02, 3.399e-01, -4.538e-02, 5.738e-01) * s0_1_1;
	r1 += M4(-3.389e-01, -2.725e-01, -2.473e-01, -2.144e-01, -5.137e-01, -3.111e-01, -2.202e-01, 2.486e-01, -2.714e-01, -1.900e-01, -3.174e-01, -2.222e-01, 6.692e-01, 2.828e-01, 2.222e-01, -2.090e-01) * s0_1_1;
	r0 += M4(8.975e-03, 2.868e-02, 8.569e-02, 6.434e-02, 8.692e-03, 1.417e-01, 3.446e-02, 1.673e-01, -3.627e-03, -3.740e-01, -2.174e-02, -5.215e-01, 7.848e-03, -2.838e-02, -3.146e-02, -1.831e-01) * s0_1_2;
	r1 += M4(-1.753e-01, -9.422e-02, -2.348e-01, -1.927e-01, 8.080e-02, 3.096e-01, -8.376e-04, 2.874e-02, -5.768e-01, -6.152e-01, -5.333e-01, -5.098e-01, -1.098e-01, -5.479e-02, 8.132e-02, 1.468e-01) * s0_1_2;
	r0 += M4(-1.704e-01, 2.470e-02, -2.060e-03, 8.171e-03, -3.994e-02, 2.944e-03, 9.153e-02, -1.675e-02, 2.672e-02, 4.868e-03, -1.714e-01, 8.039e-03, 7.876e-02, 5.384e-02, -3.627e-02, 3.518e-02) * s0_2_0;
	r1 += M4(1.228e-02, -1.005e-02, -2.091e-02, -2.480e-02, 1.847e-02, 8.444e-03, -9.253e-02, -5.457e-02, -1.038e-02, 1.273e-02, 7.762e-04, 5.577e-04, 1.184e-02, 4.763e-02, 1.523e-02, 7.652e-02) * s0_2_0;
	r0 += M4(-5.653e-02, -1.532e-01, 1.477e-01, -2.770e-02, 1.390e-01, -1.011e-01, -1.737e-02, -1.519e-01, 5.456e-02, -9.067e-02, -1.604e-01, 8.654e-03, -1.519e-01, 1.309e-01, 1.310e-02, 1.724e-01) * s0_2_1;
	r1 += M4(-2.298e-01, -1.164e-01, -2.764e-01, -2.588e-01, -4.605e-02, -1.079e-01, 2.646e-01, -1.003e-03, 1.441e-02, 3.872e-02, 4.322e-02, 3.383e-04, 3.847e-02, 1.489e-01, -1.188e-01, 8.879e-02) * s0_2_1;
	r0 += M4(-3.220e-03, -1.273e-01, 2.180e-02, -1.155e-01, -1.515e-02, -6.519e-02, 3.488e-02, 3.655e-02, 1.489e-02, -4.472e-02, 3.345e-02, 3.664e-03, -2.274e-03, 7.430e-02, -6.768e-02, 3.419e-02) * s0_2_2;
	r1 += M4(-1.279e-01, -9.333e-02, -1.333e-01, -9.873e-02, 5.796e-02, -1.052e-01, -1.294e-01, -1.671e-01, 3.263e-02, 2.249e-02, 2.313e-02, -1.449e-03, -7.182e-03, 4.593e-02, 5.052e-02, 9.177e-02) * s0_2_2;
	r0 += V4(7.120e-04, 2.604e-03, 1.120e-02, 2.116e-03);
	r0 = max(r0, V4(0.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 += V4(2.694e-03, 3.142e-03, 4.015e-03, 5.124e-03);
	r1 = max(r1, V4(0.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
}

//!DESC [CuNNy_fast_DS] -out-shuffle
//!HOOK LUMA
//!COMPUTE 16 16 8 8
//!BIND conv2
//!BIND LUMA
//!WIDTH LUMA.w 2 *
//!HEIGHT LUMA.h 2 *
//!COMPONENTS 1
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((conv2_mul * texelFetch(conv2_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(2, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((conv2_mul * texelFetch(conv2_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(2, 1) + ivec2(1, 0), 0)))
shared V4 G[2][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(2, 2);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0;
	r0 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(8.367e-04, 3.418e-04, 2.753e-03, 1.262e-03, 6.641e-02, 1.588e-02, 7.737e-03, 7.618e-03, 2.245e-03, 2.971e-03, 6.250e-03, 4.485e-03, -2.634e-02, -6.611e-03, 1.609e-03, -2.612e-03) * s0_0_0;
	r0 += M4(-1.382e-01, -2.254e-02, -1.933e-03, -4.501e-03, 1.212e-01, 1.567e-01, -2.740e-03, 3.883e-03, -1.085e-02, -4.038e-03, -3.825e-03, 8.270e-03, -7.709e-02, -3.653e-02, 1.498e-03, 2.610e-03) * s0_0_1;
	r0 += M4(-1.738e-01, -2.998e-01, -1.813e-02, -1.846e-02, -7.187e-04, 3.746e-02, -2.336e-03, 4.134e-04, 2.661e-03, -2.190e-03, -2.805e-02, -3.186e-02, 2.709e-03, -2.030e-02, 2.941e-03, 3.595e-03) * s0_0_2;
	r0 += M4(9.860e-05, 6.208e-03, -7.769e-04, 3.451e-03, -1.828e-01, -1.513e-02, -1.421e-01, -2.109e-02, -6.373e-03, 1.224e-03, -6.534e-03, 2.165e-03, -2.646e-01, -2.946e-02, -1.950e-01, -2.545e-02) * s0_1_0;
	r0 += M4(1.890e-02, -1.995e-02, 8.862e-02, 1.423e-02, 9.644e-02, -3.492e-01, 2.095e-01, 2.432e-03, 8.343e-03, -3.085e-02, 6.730e-03, -3.022e-02, -1.913e-02, 4.352e-01, -1.038e-01, 1.944e-01) * s0_1_1;
	r0 += M4(-3.157e-03, 4.730e-02, 2.264e-01, 2.686e-01, 4.420e-03, 5.768e-02, 2.212e-04, 7.554e-02, 1.091e-01, 1.086e-01, 1.047e-01, 1.032e-01, -5.512e-03, -2.680e-02, -8.009e-04, -3.918e-02) * s0_1_2;
	r0 += M4(1.805e-03, 6.204e-04, 5.042e-03, 2.847e-03, -5.171e-03, -1.513e-03, -4.456e-02, 2.166e-03, 3.317e-03, 3.381e-03, -1.679e-03, 1.177e-03, 3.585e-02, 7.146e-03, -1.126e-02, 6.312e-03) * s0_2_0;
	r0 += M4(1.895e-03, 2.539e-03, -1.010e-02, 2.173e-04, 1.245e-02, -6.488e-03, -5.749e-02, -1.306e-01, -1.067e-02, -5.183e-04, -1.427e-02, -1.411e-02, -9.165e-03, 2.947e-02, 4.285e-02, 2.017e-01) * s0_2_1;
	r0 += M4(-1.836e-03, -4.604e-04, 5.817e-03, -1.163e-03, -6.908e-05, 2.656e-02, 1.538e-04, 2.544e-02, -2.570e-02, -3.043e-02, 9.186e-03, 6.060e-03, 1.835e-03, -1.180e-02, -7.798e-04, -1.413e-02) * s0_2_2;
	r0 += M4(3.858e-02, 1.159e-02, -1.714e-02, 1.321e-03, -4.651e-02, -6.978e-03, -6.425e-04, -5.179e-04, 1.593e-02, -4.188e-03, 7.966e-03, -2.096e-03, 2.021e-02, -2.831e-03, -6.218e-03, -1.591e-03) * s1_0_0;
	r0 += M4(6.013e-02, 2.321e-02, -1.272e-02, 1.428e-03, -2.642e-02, -5.970e-02, 5.484e-04, -1.624e-02, -9.350e-03, 5.044e-02, 3.559e-02, 1.290e-02, 5.237e-02, 5.335e-02, -1.158e-02, -3.028e-03) * s1_0_1;
	r0 += M4(-3.464e-03, -6.400e-03, -3.912e-03, 4.028e-03, -7.982e-04, 3.897e-03, 5.623e-04, -5.455e-03, -1.578e-02, -5.023e-03, 4.316e-03, 6.878e-03, 2.155e-02, 9.854e-05, -1.545e-03, -8.403e-03) * s1_0_2;
	r0 += M4(6.274e-02, 9.949e-03, -1.482e-02, 1.966e-02, 1.823e-01, 3.239e-02, 1.672e-01, 2.803e-02, 3.854e-02, 2.798e-03, 5.290e-02, -1.323e-02, 8.033e-02, -4.927e-03, 5.214e-02, 6.217e-03) * s1_1_0;
	r0 += M4(1.412e-01, 1.302e-01, 1.831e-01, -7.765e-01, 1.858e-01, -5.863e-01, 8.257e-02, 1.150e-01, 2.036e-01, 1.588e-01, -7.754e-01, 7.013e-02, -6.113e-01, 1.329e-01, 2.431e-01, 1.997e-01) * s1_1_1;
	r0 += M4(4.956e-03, 2.910e-02, -1.130e-02, 3.894e-02, -6.194e-03, 4.187e-02, 1.297e-02, 1.890e-02, -9.858e-03, 3.552e-02, -1.203e-02, -4.467e-02, 7.554e-03, -1.228e-01, 1.355e-02, -3.137e-02) * s1_1_2;
	r0 += M4(-9.285e-04, -3.891e-03, 1.489e-02, 1.933e-03, -2.277e-02, 2.816e-03, 1.509e-02, -1.466e-02, -1.913e-03, 6.877e-04, 2.422e-02, 2.455e-03, 4.516e-03, 3.046e-03, 3.825e-03, -8.076e-03) * s1_2_0;
	r0 += M4(3.489e-03, -1.285e-02, 4.971e-02, 6.487e-02, 1.042e-02, -9.749e-03, -3.724e-02, -1.479e-01, -1.068e-02, 2.582e-03, 6.066e-02, 5.185e-02, -5.724e-03, 7.859e-03, -5.088e-02, 3.267e-02) * s1_2_1;
	r0 += M4(-2.195e-03, -8.528e-03, -1.858e-03, 1.690e-02, -1.236e-03, 7.156e-03, -1.615e-03, 8.697e-03, -3.281e-03, -5.182e-03, 5.321e-03, 5.580e-03, 6.155e-03, -1.604e-02, -3.105e-03, -5.211e-02) * s1_2_2;
	r0 += V4(-7.733e-10, -6.961e-10, -2.278e-10, -5.781e-09);
	r0 = r0;
	vec2 opt = 0.5 * LUMA_pt;
	vec2 fpos = (vec2(opos) + vec2(0.5)) * opt;
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0.x + LUMA_tex(fpos + vec2(0.0, 0.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r0.y + LUMA_tex(fpos + vec2(1.0, 0.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(0, 1), vec4(r0.z + LUMA_tex(fpos + vec2(0.0, 1.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(1, 1), vec4(r0.w + LUMA_tex(fpos + vec2(1.0, 1.0) * opt).r, 0.0, 0.0, 1.0));
}
